suppressPackageStartupMessages(
  library(tidyverse)
)
library(leaflet)

Establish file path to the project directory. This will be used to access data.

project.path <- rprojroot::find_rstudio_root_file()
data.path <- file.path(project.path, "data", "2019-04-30")

Data Preprocessing

Functions

clean_up

The clean_up() function is used to aid in the standardization of the data. The processes described below generally eliminate some of most common string features that can cause erroneous mismatching.

  1. tolower()- all column names and character column values are converted to lower case strings.
  2. trimws() - leading and trailing whitespace is removed from all column names and character columns.
  3. gsub()- more than one space is removed from strings in character columns.
clean_up <- function(x) {
  names(x) <- tolower(names(x))
  names(x) <- trimws(names(x))
  
  final.df <- dplyr::mutate_if(x,
                               is.character,
                               list(~trimws(tolower(.))))  
  
  # remove instances of more than one space
  final.df <- dplyr::mutate_if(final.df,
                               is.character,
                               list(~gsub("[' ']{2,}", " ", .)))  
  
  final.df
}

Stations

On 4/30/2019, A.J. Smith suggested that we drop stations sampled for HABs monitoring (i.e., “13-gunk-40.3”, “13-gunk-t35-0.2”, “13-gunk-37.7”, and “13-lgun-6.0”).

ex.station.vec <- c("13-gunk-40.3",
                    "13-gunk_t35-0.2",
                    "13-gunk-37.7",
                    "13-lgun-6.0")

Station Information

Import Wallkill 2017 and 2018 station information and combine these into a single data frame. 2018 special study station “13-walk-0.8” is considered equivalent to “13-walk-0.7”, and therefore is relabeled as “13-walk-0.8”.

stations17.df <- file.path(data.path,
                           "sites",
                           "WallkillSites2017.csv") %>% 
  read.csv(stringsAsFactors = FALSE,
           na.strings=c(""," ","NA")) %>% 
  clean_up() %>% 
  rename(station = sbu.id,
         survey = ras)

stations18.df <- file.path(data.path,
                           "sites",
                           "WallkillSites2018.csv") %>% 
  read.csv(stringsAsFactors = FALSE,
           na.strings=c(""," ","NA")) %>% 
  clean_up() %>% 
  rename(station = site_id,
         stream = name) %>% 
  mutate(station = if_else(station %in% "13-walk-0.8", "13-walk-0.7", station))

stations.df <- bind_rows(stations17.df, stations18.df) %>% 
  mutate(rivmile = gsub(".*-", "", station),
         location = gsub("-.*", "", station),
         station = paste("13", station, sep = "-")) %>% 
  filter(!station %in% ex.station.vec)

Clear the global environment of unnecessary objects.

rm(stations17.df, stations18.df)

PEERS Stations

Import 2018 PEERS stations collected within the Wallkill basin.

stations_peers.df <- file.path(data.path,
                           "peers",
                           "peers_stations.csv") %>% 
  read.csv(stringsAsFactors = FALSE,
           na.strings=c(""," ","NA")) %>% 
  clean_up() %>% 
  select(station, latitude, longitude) %>% 
  mutate(type = "peers")

Add the PEERS stations to the stations.df data frame.

stations.df <- bind_rows(stations.df, stations_peers.df) 

Gage Stations

Import USGS gage stations found within the Wallkill basin.

gage.df <- file.path(data.path,
                           "gage",
                           "GageSites.csv") %>% 
  read.csv(stringsAsFactors = FALSE,
           na.strings=c(""," ","NA")) %>% 
  clean_up() %>% 
  mutate(type = "gage")

Add the gaged stations to the stations.df data frame.

stations.df <- bind_rows(stations.df, gage.df)  %>% 
  mutate(gage = if_else(type %in% "gage", TRUE, FALSE))

Station Order

Zachary Smith ordered the Wallkill 2017 and 2018 samples from upstream to downstream relative to the mainstem Wallkill.

station_order.df <- file.path(data.path,
                              "finalized",
                              "wallkill_station-order_up-to-down.csv") %>% 
  read.csv(stringsAsFactors = FALSE,
           na.strings=c(""," ","NA")) %>% 
  clean_up() %>% 
  filter(!station %in% ex.station.vec)
stations.df <- left_join(station_order.df, stations.df, by = "station")

BAP

Import Wallkill 2017 and 2018 BAP data and combine these into a single data frame.

  1. Data processed with the clean_up() function.
  2. Column name “location.station” changed to “station”
  3. The date column is converted to a date class
  4. The data is aggregated by station and date, and subsequently the mean and median BAP scores are calculated for these aggregates
  5. Stations “qker-0.9”, “walk-35.6”, and “wklei-0.6” were filtered out of the data frame because they were sampled as low gradient sites in 2018. These stations were sampled as kick-net samples in 2017. Both years are excluded.
bap.df <- c("WALLKILL2017_BAP.csv",
            "WALLKILL2018_BAP.csv") %>% 
  map_df(function(file.i) {
    file.path(data.path,
              "bap",
              file.i) %>% 
      read.csv(stringsAsFactors = FALSE,
               na.strings=c(""," ","NA"))
  }) %>% 
  clean_up() %>% 
  rename(station = location.station) %>% 
  mutate(station = paste("13", station, sep = "-"),
         rivmile = factor(rivmile, sort(unique(rivmile), decreasing = TRUE)),
         date = as.Date(date, "%m/%d/%Y")) %>% 
  group_by(station, date) %>% 
  mutate(replicates = n(),
         mean_bap = mean(bap, na.rm = TRUE),
         median_bap = median(bap, na.rm = TRUE)) %>% 
  ungroup() %>% 
  filter(!station %in% c("qker-0.9",
                        "walk-35.6",
                        "wklei-0.6"),
         !station %in% ex.station.vec)

The data is exported as a CSV and manually added to the Wallkill SharePoint “finalized” folder.

data.table::fwrite(bap.df,
                   file.path(data.path,
                             "finalized",
                             "wallkill_bap_2017-2018.csv"))

Chemistry

Average Chemistry

Import the Wallkill average chemistry data.

tp.df <- file.path(data.path,
                   "chemistry",
                   "Wallkill_AVG_chem.csv") %>% 
  read.csv(stringsAsFactors = FALSE,
           na.strings=c(""," ","NA")) %>% 
  clean_up() %>% 
  rename(station = sbuid) %>% 
  select(station,
         tp_avg,
         tp_log10)

2017 and 2018 Chemistry

Import

Import Wallkill 2017 and 2018 Special Studies and 2018 PEERS chemistry data. Perform general data standardization with the clean_up() function. The 2018 Special Study data was QAQCed by Gavin Lemley (2019-04-29) using an R script developed by Alene Onion and Gavin Lemley. However, the 2017 Special Study chemistry and the 2018 PEERS data have not been QAQCed.

chem2017.df <- file.path(data.path,
                         "chemistry",
                         "2017_wallkill_chem_qaqcd-2019-04-29.csv") %>% 
  read.csv(stringsAsFactors = FALSE,
           na.strings = c(""," ","NA"),
           colClasses = c("fraction" = "character")) %>% 
  clean_up()

chem2018.df <- file.path(data.path,
                         "chemistry",
                         "2018_wallkill_chem_qaqcd-2019-03-29.csv") %>% 
  read.csv(stringsAsFactors = FALSE,
           na.strings = c(""," ","NA"),
           colClasses = c("fraction" = "character")) %>% 
  clean_up() %>% 
  mutate(project_name = "wallkill special study 2018")

peers2018.df <- file.path(data.path,
                          "peers",
                         "PEERS.wallkill.output.site.names.fixed.csv") %>% 
  read.csv(stringsAsFactors = FALSE,
           na.strings = c(""," ","NA"),
           colClasses = c("fraction" = "character")) %>% 
  clean_up()  %>% 
  mutate(fraction = "t")

2018 PEERS

We will only retain the columns present in the QAQCed 2018 Special Study data. The script below identifies any columns present in the 2018 Special Study data but missing in the 2018 PEERS data. We will want to add these columns to the 2018 PEERS data, if possible.

names(chem2018.df)[!names(chem2018.df) %in% names(peers2018.df)]
## [1] "project_name" "siteid"

“project_name” was the one of the columns which was present in the 2018 Special Study data but absent from the 2018 PEERS data. The script below adds the column “project_name” to the 2018 PEERS data frame. The column is populated with the character string “wallkill peers 2018”, which follows the labeling scheme found in the 2018 Special Study data (i.e., wallkill special study 2018).

peers2018.df$project_name <- "wallkill peers 2018"

“siteid” is present in the 2018 Special Study data but absent from the 2018 PEERS data. The script below adds the column “siteid” to the 2018 PEERS data frame. The column is populated by extracting the basin, location, and rivermile elements from “sys_sample_code”, which follows the same pattern found in the 2018 Special Study data.

The regex was difficult to solve. I found the answer to this stackoverflow question to be helpful (https://stackoverflow.com/questions/25448921/regex-to-extract-all-characters-before-the-second-dash-hyphen). The description below is modified from Casimir et Hippolyte answer in the link above.

  1. \A = anchor for the start of the string
  2. (?: = open a non-capturing group
  3. [^-]++ = all that is not an hyphen (possessive)
  4. -?? = optional hyphen (lazy)
  5. ){3} = repeat the group 3 times
peers2018.df$siteid <- str_extract(peers2018.df$sys_sample_code,
                                   "\\A(?:[^-]++-??){3}")

Now that the 2018 PEERS data contains all of the columns present in the 2018 RAS data, the 2018 PEERS data is subset to only include columns that are found in the 2018 RAS data.

peers2018.df <- peers2018.df[, names(peers2018.df) %in% names(chem2018.df)]

Append

The Wallkill 2017 and 2018 Special Studies and 2018 PEERS chemistry data are combined into a single data frame.

  1. Wallkill 2017 and 2018 Special Studies and 2018 PEERS chemistry data are appended together with bind_rows()
  2. The “date” column is converted to the data time class “POSIXct”
  3. “siteid” is renamed to “station”
  4. The columns are re-ordered using select()
  5. 2018 special study station “13-walk-0.8” is considered equivalent to “13-walk-0.7”, and therefore is relabeled as “13-walk-0.8”.
chem.df <- bind_rows(chem2017.df, chem2018.df, peers2018.df) %>% 
  mutate(date_time = if_else(project_name == "wallkill peers 2018",
                             as.POSIXct(sample_date, "%m/%d/%Y", tz = "EST"),
                             as.POSIXct(sample_date, "%Y-%m-%d %H:%M:%S", tz = "EST")),
         date_time = as.character(date_time)) %>% 
  rename(station = siteid) %>% 
  select(project_name, sys_sample_code, station,
         date_time, everything(), -sample_date) %>% 
  filter(!station %in% ex.station.vec) %>% 
  mutate(station = if_else(station %in% "13-walk-0.8", "13-walk-0.7", station))

The data is exported as a CSV and manually added to the Wallkill SharePoint “finalized” folder.

data.table::fwrite(chem.df,
                   file.path(data.path,
                             "finalized",
                             "wallkill_chem_ras-2017-2018_peers-2018.csv"))

Clear the global environment of unnecessary objects.

rm(chem2017.df, chem2018.df, peers2018.df)

Station Locations

Import he Wallkill NHD clip created by Charles Stoll as a SpatialLinesDataFrame.

nhd.gdb <- file.path(project.path, "data", "gis", "Wallkill_Clip_NHD.gdb")

streams.polyline <- rgdal::readOGR(dsn = nhd.gdb, layer = "WALK_0_7_NHD_CLIP")
## OGR data source with driver: OpenFileGDB 
## Source: "C:\Users\zmsmith\OneDrive - New York State Office of Information Technology Services\project\nysdec\wallkill\data\gis\Wallkill_Clip_NHD.gdb", layer: "WALK_0_7_NHD_CLIP"
## with 9129 features
## It has 15 fields
## Warning in rgdal::readOGR(dsn = nhd.gdb, layer = "WALK_0_7_NHD_CLIP"): Z-
## dimension discarded

Create a function to standardize leaflet plots.

leaflet_plot <- function(x) {

pal <- colorFactor(c("#508b1a", "#551a8b"), domain = c(unique(x$gage)))

leaflet(x) %>% 
  addTiles() %>% 
  addPolylines(data = streams.polyline,
               weight = 2,
               opacity = 1) %>% 
  addCircleMarkers(~longitude, ~latitude,
                   color = ~pal(gage),
                   stroke = FALSE, fillOpacity = 1.00,
             popup = paste(
               paste("<b>Station:</b>", x$station),
               paste("<b>Stream:</b>", x$stream),
               paste("<b>Longitude:</b>", x$longitude),
               paste("<b>Latitude:</b>", x$latitude),
               paste("<b>Type:</b>", x$type),
               sep = "<br/>"
             ))
}

Plot the Wallkill locations on an interactive map.

leaflet_plot(stations.df)
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
ggplot(data = stations.df, aes(x = longitude, y = latitude, color = order)) +
  geom_point() 

Data Summary

Chemistry

2017 Special Study

  • Description:
  • Intent of Collection: To update Priority Waterbody List (PWL) segments.
  • Collected By: NYSDEC
  • Date Range: 2017-07-19 to 2017-10-12 (85 days)
  • Number of Stations: 13
  • List of Stations: 13-gunk-0.4, 13-monh-0.4, 13-qker-0.9, 13-walk-19.0, 13-walk-2.1, 13-walk-22.8, 13-walk-26.9, 13-walk-29.9, 13-walk-35.6, 13-walk-46.6, 13-walk-60.1, 13-wchee-0.6, 13-wklei-0.6
  • Number of Parameters: 24
  • List of Parameters: alkalinity, total (as caco3), aluminum, arsenic, cadmium, chloride (as cl), chlorophyll a, conductivity at 25 degrees celsius, copper, hardness (as caco3), iron, lead, magnesium, nickel, nitrate+nitrite as nitrogen, nitrogen, ammonia (as n), nitrogen, nitrate (as n), nitrogen, nitrite, ph, phosphorus, total (as p), silver, temperature of ph analysis, total dissolved solids (residue, filterable), turbidity, zinc
  • Sampling Frequency:
Project_Name Station Sample_Frequency
wallkill special study 2017 13-gunk-0.4 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2017 13-monh-0.4 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2017 13-qker-0.9 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2017 13-walk-19.0 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2017 13-walk-2.1 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2017 13-walk-22.8 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2017 13-walk-26.9 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2017 13-walk-29.9 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2017 13-walk-35.6 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2017 13-walk-46.6 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2017 13-walk-60.1 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2017 13-wchee-0.6 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2017 13-wklei-0.6 Sampled on average every 21 days (number of visits = 5)

2018 Special Study

  • Description:
  • Intent of Collection: To update Priority Waterbody List (PWL) segments. This study expands the spatial and temporal coverage of the 2017 Special Study.
  • Collected By: NYSDEC
  • Date Range: 2018-07-10 to 2018-10-04 (86 days)
  • Number of Stations: 25
  • List of Stations: 13-dwar-2.0, 13-gunk-0.4, 13-maso-0.2, 13-monh-0.4, 13-monh-4.1, 13-pkil-0.4, 13-poch-1.8, 13-qker-0.9, 13-riog-0.7, 13-rutg-1.5, 13-swak-1.7, 13-tinw-0.5, 13-walk-0.7, 13-walk-19.0, 13-walk-2.1, 13-walk-22.8, 13-walk-26.9, 13-walk-29.9, 13-walk-35.6, 13-walk-44.4, 13-walk-46.6, 13-walk-60.1, 13-walk-9.8, 13-wchee-0.6, 13-wklei-0.6
  • Number of Parameters: 24
  • List of Parameters: alkalinity, total (as caco3), aluminum, arsenic, cadmium, calcium, chloride (as cl), chlorophyll a, copper, hardness (as caco3), iron, lead, magnesium, nickel, nitrate+nitrite as nitrogen, nitrogen, nitrogen, ammonia (as n), nitrogen, kjeldahl, total, nitrogen, nitrate (as n), nitrogen, nitrite, phosphorus, total (as p), silver, total dissolved solids (residue, filterable), turbidity, zinc
  • Sampling Frequency:
Project_Name Station Sample_Frequency
wallkill special study 2018 13-dwar-2.0 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-gunk-0.4 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-maso-0.2 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-monh-0.4 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-monh-4.1 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-pkil-0.4 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-poch-1.8 Sampled on average every 24 days (number of visits = 4)
wallkill special study 2018 13-qker-0.9 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-riog-0.7 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-rutg-1.5 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-swak-1.7 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-tinw-0.5 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-walk-0.7 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-walk-19.0 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-walk-2.1 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-walk-22.8 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-walk-26.9 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-walk-29.9 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-walk-35.6 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-walk-44.4 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-walk-46.6 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-walk-60.1 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-walk-9.8 Sampled on average every 28 days (number of visits = 4)
wallkill special study 2018 13-wchee-0.6 Sampled on average every 21 days (number of visits = 5)
wallkill special study 2018 13-wklei-0.6 Sampled on average every 21 days (number of visits = 5)

2018 PEER

  • Description: The purpose of the PEERS program is a collaboration between the NYS Department of Environmental Conservation (NYSDEC) Stream Section and self-funded stream monitoring projects outside the NYSDEC to ensure data collected satisfy NYSDEC QA criteria and may be used to augment the NYSDEC Streams Section’s Water Column Chemistry and Biological (macroinvertebrate) data sets.
  • Intent of Collection: The purpose of this PEERS project is to augment NYSDEC’s professional sampling effort and to address local concerns about stormwater runoff and septic impacts.
  • Collected By: Professional External Evaluations of Rivers and Streams (PEERS)
  • Date Range: 2018-07-10 to 2018-10-02 (84 days)
  • Number of Stations: 9
  • List of Stations: 13-maso-2.8, 13-pkil-5.7, 13-poch-2.6, 13-riog_t1-0.8, 13-rutg-9.3, 13-tinw_t3-2.1, 13-tinw-4.5, 13-walk_t13-0.7, 13-walk_t15-0.1
  • Number of Parameters: 8
  • List of Parameters: nitrate+nitrite as nitrogen, nitrogen, nitrogen, ammonia (as n), nitrogen, kjeldahl, total, nitrogen, nitrate (as n), nitrogen, nitrite, phosphorus, total (as p), turbidity
  • Sampling Frequency:
Project_Name Station Sample_Frequency
wallkill peers 2018 13-maso-2.8 Sampled on average every 21 days (number of visits = 5)
wallkill peers 2018 13-pkil-5.7 Sampled on average every 21 days (number of visits = 5)
wallkill peers 2018 13-poch-2.6 Sampled on average every 21 days (number of visits = 5)
wallkill peers 2018 13-riog_t1-0.8 Sampled on average every 21 days (number of visits = 5)
wallkill peers 2018 13-rutg-9.3 Sampled on average every 21 days (number of visits = 5)
wallkill peers 2018 13-tinw-4.5 Sampled on average every 21 days (number of visits = 5)
wallkill peers 2018 13-tinw_t3-2.1 Sampled on average every 21 days (number of visits = 5)
wallkill peers 2018 13-walk_t13-0.7 Sampled on average every 21 days (number of visits = 5)
wallkill peers 2018 13-walk_t15-0.1 Sampled on average every 21 days (number of visits = 5)

BAP Plots

Brian Duffy requested… “Boxplot BAP with LogTP (avg or scatter) -highlight mainstem”

Preprocess

final.df <- bap.df %>% 
    select(station,
         location,
         rivmile,
         date,
         bap) %>% 
  full_join(tp.df, by = "station") %>% 
  full_join(station_order.df, by = "station") %>% 
  arrange(order) %>% 
  mutate(station = factor(station, unique(station)))

Boxplot

final.df %>% 
  ggplot(aes(station, bap)) +
  geom_boxplot() +
  geom_point(aes(y = tp_log10 * 2), color = "red") +
  xlab("Station") +
  ylab("BAP") +
  scale_y_continuous(sec.axis = sec_axis(~. / 2, name = "Log10 TP")) +
  theme(axis.text.x = element_text(angle = 90,
                                   hjust = 1,
                                   vjust = 0.5))
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).
## Warning: Removed 143 rows containing missing values (geom_point).

Scatter Plot

final.df %>% 
  mutate(wallkill = if_else(location == "walk", "Wallkill", "Tributary")) %>% 
  ggplot(aes(tp_log10, bap, color = wallkill)) +
  geom_point() +
  xlab("Log10 TP") +
  ylab("BAP") 
## Warning: Removed 156 rows containing missing values (geom_point).

Heatmap

Preprocessing

On 4/29/2019, Brian Duffy suggested summarizing data collected from a station by year (i.e., find the mean value for each year).

BAP

BAP is rescaled to represent values between 0 and 1 by dividing by 10, the max BAP score.

heat_bap.df <- bap.df %>% 
  group_by(station,
           date) %>% 
  summarize(bap = mean(bap, na.rm = TRUE)) %>% 
  ungroup() %>% 
  mutate(normalized = 1 - (bap / 10),
         param = "bap") %>% 
  select(station, date, param, normalized)

Chemistry

chem.ref.df <- chem.df %>% 
  select(chemical_name) %>% 
  distinct()

# knitr::kable(chem.ref.df)
heat_chem.df <- chem.df %>% 
  filter(project_name != "wallkill peers 2018") %>% 
  mutate(interpreted_qualifiers = if_else(is.na(interpreted_qualifiers), "blank", interpreted_qualifiers),
         result_value = if_else(grepl("analyte was analyzed for but not detected", interpreted_qualifiers),
                                0,
                                result_value)) %>% 
  select(station,
         date_time,
         chemical_name,
         result_value) %>% 
  group_by(station,
         date_time,
         chemical_name) %>% 
  summarize(result_value = mean(result_value, na.rm = TRUE)) %>% 
  ungroup() %>% 
  group_by(chemical_name) %>% 
  mutate(obs_min = min(result_value, na.rm = TRUE),
         obs_max = max(result_value, na.rm = TRUE)) %>% 
  ungroup() %>% 
  mutate(normalized = (result_value - obs_min) / (obs_max - obs_min)) %>% 
  rename(param = chemical_name) %>% 
  select(station, date_time, param, normalized)

# phosphorus 0 - 75
# Chlorophyll a 0-6
# 

Finalize

heat.df <- bind_rows(heat_bap.df,
                     heat_chem.df) %>% 
  mutate(date_time = lubridate::month(date_time)) %>% 
  group_by(station, date_time, param) %>% 
  summarize(normalized = mean(normalized, na.rm = TRUE)) %>% 
  ungroup()
heat.mat <- heat.df %>% 
  filter(param != "bap") %>%
  unite(site, c("station", "date_time")) %>% 
  spread(site, normalized) %>% 
  
  tibble::column_to_rownames("param") %>% 
   
  as.matrix()

heatmap(heat.mat,
        col = viridis::inferno(256),
        row_names_max_width = unit(6, "cm")
        )
## Warning in plot.window(...): "row_names_max_width" is not a graphical
## parameter
## Warning in plot.xy(xy, type, ...): "row_names_max_width" is not a graphical
## parameter
## Warning in title(...): "row_names_max_width" is not a graphical parameter

heat.df %>% 
  unite(site, c("station", "date_time")) %>% 
  group_by(site) %>% 
  summarize(normalized = mean(normalized, na.rm = TRUE)) %>% 
  arrange(normalized) %>% 
  mutate(site = factor(site, unique(site))) %>% 
  ggplot(aes(1, site, fill = normalized)) +
  geom_tile() +
  scale_fill_viridis_c(option = "inferno")

heat.mat <- heat.df %>% 
  unite(site, c("station", "date_time")) %>% 
  spread(site, normalized) %>% 
  tibble::column_to_rownames("param") %>% 
  as.matrix()

heatmap(heat.mat,
        col = viridis::inferno(256),
        row_names_max_width = unit(6, "cm")
        )
heat.nutrient <- heat.df %>% 
  unite(site, c("station", "date_time")) %>% 
  filter(param %in% c("nitrate+nitrite as nitrogen",
                      "nitrogen",
                      "nitrogen, ammonia (as n)",
                      "nitrogen, kjeldahl, total",
                      "nitrogen, nitrate (as n)",
                      "nitrogen, nitrite",
                      "phosphorus, total (as p)")) %>% 
  spread(site, normalized) %>% 
  tibble::column_to_rownames("param") %>% 
  as.matrix()

heatmap(heat.nutrient,
        col = viridis::inferno(256),
        row_names_max_width = unit(2, "cm")
        )
## Warning in plot.window(...): "row_names_max_width" is not a graphical
## parameter
## Warning in plot.xy(xy, type, ...): "row_names_max_width" is not a graphical
## parameter
## Warning in title(...): "row_names_max_width" is not a graphical parameter

heat.nutrient <- heat.df %>% 
  unite(site, c("station", "date_time")) %>% 
  filter(param %in% c("aluminum",
                      "copper",
                      "iron",
                      "lead",
                      "magnesium",
                      "silver",
                      "zinc")) %>% 
  spread(site, normalized) %>% 
  tibble::column_to_rownames("param") %>% 
  as.matrix()

heatmap(heat.nutrient,
        col = viridis::inferno(256),
        row_names_max_width = unit(6, "cm")
        )
## Warning in plot.window(...): "row_names_max_width" is not a graphical
## parameter
## Warning in plot.xy(xy, type, ...): "row_names_max_width" is not a graphical
## parameter
## Warning in title(...): "row_names_max_width" is not a graphical parameter